We take the kobo data that has one record per subject and we join
with the foodbook_daily_intakes dataset which has one
record per subject per recall, group code etc…
library(dduh.ds)
library(tidyverse) # data manipulation
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr 1.1.2 ✔ readr 2.1.4
## ✔ forcats 1.0.0 ✔ stringr 1.5.0
## ✔ ggplot2 3.4.3 ✔ tibble 3.2.1
## ✔ lubridate 1.9.2 ✔ tidyr 1.3.0
## ✔ purrr 1.0.2
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(caret)
## Loading required package: lattice
##
## Attaching package: 'caret'
##
## The following object is masked from 'package:purrr':
##
## lift
config <- dduh.ds::load_config(config_file = "../../configs/ea_data.yaml")
koboData <- readRDS(config$data_repositories$kobo)
foodbookDailyIntakes <- readRDS(config$data_repositories$foodbook_daily_intakes) |>
mutate(MealShort=if_else(str_ends(Meal, 'SNACK'), 'SNACK', 'MEAL' ))
combinedProfiles <-
koboData |> select(
participant_id,
sport_remap,
gender,
frequency_nondiet_drinks,
count_icdas_score_2_3,
count_icdas_score_2_4,
count_icdas_score_2_5,
count_icdas_score_2_6,
) |>
mutate(
combined_icdas_score_2 =
count_icdas_score_2_3 +
count_icdas_score_2_4 +
count_icdas_score_2_5 +
count_icdas_score_2_6
) |>
inner_join(foodbookDailyIntakes,
by = join_by(participant_id == User),
keep = TRUE) |>
filter(!is.na(User)) |>
select(-User)
Redoing this analysis for all recalls and overall descriptives food
for paper 2, added Fat, protein. ALso had prev only had ‘dental
nutrients’ so energy intake eg low, included all nutrients now
dentalNutStats <- function(fb24Daily, groupCols){
dentalFoodGroups <- read_csv("food-group-dental-analysis.csv")
dentalNutrients <- c("ENERGY (KCAL)", "FAT", "PROTEIN", "CHO", "SUGARS", "STARCH")
fbSubjectIntakeLong <-
fb24Daily |>
left_join(dentalFoodGroups, by = join_by(GroupCodeShort)) |>
mutate(
GroupCodeShort = if_else(is.na(GroupCodeShortId), "Others", GroupCodeShort),
GroupCodeShortIsCariogenic = case_when(
is.na(IsCariogenic) ~ "NonCariogenic",
GroupCodeShort == "Beverages Water" ~ "Water",
IsCariogenic ~ "Cariogenic",
)
) |>
mutate(MealShort = if_else(str_ends(Meal, 'SNACK'), 'SNACK', 'MEAL')) |>
select(
participant_id,
consumption,
all_of(dentalNutrients),
all_of(groupCols),
`Recall #`,
GroupCodeShort,
IsCariogenic,
) |>
pivot_longer(cols=dentalNutrients, names_to = "Nutrient", values_to = "intake") |>
group_by(across(all_of(c("participant_id", "Nutrient", groupCols)))) |>
reframe(total_consumptions=sum(consumption),
max_daily_consumptions=max(consumption),
mean_intake = mean(intake),
max_intake = max(intake),
sum_intake = sum(intake),
recalls=n_distinct(`Recall #`)
) |>
mutate(mean_daily_intake = sum_intake / recalls)
fbSubjectIntakeLong
}
nutrients <- c("ENERGY (KCAL)",
"FAT",
"PROTEIN",
"CHO",
"SUGARS",
"STARCH")
nutrientsByGender <- dentalNutStats(combinedProfiles, c("gender"))
## Rows: 14 Columns: 3
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (1): GroupCodeShort
## dbl (1): GroupCodeShortId
## lgl (1): IsCariogenic
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## Warning: Using an external vector in selections was deprecated in tidyselect 1.1.0.
## ℹ Please use `all_of()` or `any_of()` instead.
## # Was:
## data %>% select(dentalNutrients)
##
## # Now:
## data %>% select(all_of(dentalNutrients))
##
## See <https://tidyselect.r-lib.org/reference/faq-external-vector.html>.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
nutrientsByGenderPivot <- nutrientsByGender |>
select(participant_id, gender, Nutrient, mean_daily_intake) |>
pivot_longer(cols = c("mean_daily_intake"), names_to = "statistic")
p <- ggplot(nutrientsByGenderPivot) +
geom_boxplot(aes(y=value, x=statistic, fill=gender)) +
facet_wrap(Nutrient~., scales = "free_y") +
ggtitle("Nutrients daily intake")
print(p)

meanNutrientByGender <- nutrientsByGender |>
group_by(gender, Nutrient) |>
reframe(
daily_mean_intake = mean(mean_daily_intake, na.rm = TRUE),
sd_daily_intake = sd(mean_daily_intake, na.rm = TRUE)
) |>
pivot_wider(names_from = gender, values_from = c(daily_mean_intake, sd_daily_intake))
meanNutrient <- nutrientsByGender |>
group_by(Nutrient) |>
reframe(
daily_mean_intake = mean(mean_daily_intake, na.rm = TRUE),
sd_daily_intake = sd(mean_daily_intake, na.rm = TRUE)
)
knitr::kable(meanNutrient, caption = "Overall daily mean intake", digits = 2)
Overall daily mean intake
| CHO |
323.94 |
188.86 |
| ENERGY (KCAL) |
2677.71 |
1352.26 |
| FAT |
98.38 |
52.12 |
| PROTEIN |
139.19 |
69.51 |
| STARCH |
168.75 |
90.31 |
| SUGARS |
127.78 |
93.55 |
knitr::kable(meanNutrientByGender, caption = "Daily mean intake by gender", digits = 2)
Daily mean intake by gender
| CHO |
301.99 |
336.78 |
151.80 |
207.80 |
| ENERGY (KCAL) |
2407.42 |
2835.81 |
1085.34 |
1473.13 |
| FAT |
86.46 |
105.35 |
41.46 |
56.66 |
| PROTEIN |
119.72 |
150.59 |
56.62 |
74.19 |
| STARCH |
153.50 |
177.67 |
64.36 |
102.03 |
| SUGARS |
121.47 |
131.48 |
71.70 |
104.71 |
By meal type and cariogenic level
nutrients <- c("ENERGY (KCAL)",
"CHO",
"SUGARS",
"STARCH")
nutrientsByFoodProfile <-
dentalNutStats(combinedProfiles, c("MealShort", "GroupCodeShortIsCariogenic")) |>
filter(GroupCodeShortIsCariogenic != "Water") |>
mutate()
## Rows: 14 Columns: 3
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (1): GroupCodeShort
## dbl (1): GroupCodeShortId
## lgl (1): IsCariogenic
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
nutrientsByFoodProfilePivot <- nutrientsByFoodProfile |>
filter(Nutrient %in% nutrients) |>
select(participant_id, Nutrient, mean_daily_intake, MealShort, GroupCodeShortIsCariogenic) |>
pivot_longer(cols = c("mean_daily_intake"), names_to = "statistic")
p <- ggplot(nutrientsByFoodProfilePivot) +
geom_boxplot(aes(y=value, x=GroupCodeShortIsCariogenic, fill=MealShort)) +
facet_wrap(Nutrient~., scales = "free_y") +
xlab("meal type and cariogenic classification") +
ylab("mean daily intake (g)")
print(p)

meanNutrientByFoodProfile <- nutrientsByFoodProfile |>
group_by(Nutrient, GroupCodeShortIsCariogenic, MealShort) |>
reframe(
daily_mean_intake = mean(mean_daily_intake, na.rm = TRUE),
sd_daily_intake = sd(mean_daily_intake, na.rm = TRUE)
)
knitr::kable(meanNutrientByFoodProfile, caption = "Daily mean intake by meal type and cariogenic", digits = 2)
Daily mean intake by meal type and cariogenic
| CHO |
Cariogenic |
MEAL |
86.15 |
69.47 |
| CHO |
Cariogenic |
SNACK |
73.81 |
78.88 |
| CHO |
NonCariogenic |
MEAL |
129.10 |
72.20 |
| CHO |
NonCariogenic |
SNACK |
30.14 |
32.88 |
| ENERGY (KCAL) |
Cariogenic |
MEAL |
542.93 |
423.83 |
| ENERGY (KCAL) |
Cariogenic |
SNACK |
488.85 |
449.33 |
| ENERGY (KCAL) |
NonCariogenic |
MEAL |
1392.47 |
605.84 |
| ENERGY (KCAL) |
NonCariogenic |
SNACK |
298.62 |
301.01 |
| FAT |
Cariogenic |
MEAL |
13.69 |
12.88 |
| FAT |
Cariogenic |
SNACK |
16.22 |
15.19 |
| FAT |
NonCariogenic |
MEAL |
61.11 |
31.04 |
| FAT |
NonCariogenic |
SNACK |
12.71 |
14.95 |
| PROTEIN |
Cariogenic |
MEAL |
24.65 |
25.66 |
| PROTEIN |
Cariogenic |
SNACK |
14.56 |
25.45 |
| PROTEIN |
NonCariogenic |
MEAL |
87.74 |
37.73 |
| PROTEIN |
NonCariogenic |
SNACK |
15.28 |
17.03 |
| STARCH |
Cariogenic |
MEAL |
27.77 |
23.04 |
| STARCH |
Cariogenic |
SNACK |
18.61 |
18.69 |
| STARCH |
NonCariogenic |
MEAL |
90.20 |
60.08 |
| STARCH |
NonCariogenic |
SNACK |
11.11 |
24.54 |
| SUGARS |
Cariogenic |
MEAL |
48.47 |
48.60 |
| SUGARS |
Cariogenic |
SNACK |
45.34 |
51.38 |
| SUGARS |
NonCariogenic |
MEAL |
30.90 |
25.45 |
| SUGARS |
NonCariogenic |
SNACK |
18.03 |
19.08 |
By Group Short code
nutrients <- c("ENERGY (KCAL)",
"CHO",
"SUGARS",
"STARCH")
nutrientsByFoodProfile <-
dentalNutStats(combinedProfiles, c("GroupCodeShort"))
## Rows: 14 Columns: 3
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (1): GroupCodeShort
## dbl (1): GroupCodeShortId
## lgl (1): IsCariogenic
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
nutrientsByFoodProfilePivot <- nutrientsByFoodProfile |>
filter(Nutrient %in% nutrients) |>
select(participant_id, Nutrient, mean_daily_intake, GroupCodeShort) |>
pivot_longer(cols = c("mean_daily_intake"), names_to = "statistic")
p <- ggplot(nutrientsByFoodProfilePivot) +
geom_boxplot(aes(y=value, x=statistic, fill=GroupCodeShort)) +
facet_wrap(Nutrient~., scales = "free_y") +
xlab("mean daily intake") +
ggtitle("Nutrients daily intake by food group")
print(p)

meanNutrientByGroupCodeShort <- nutrientsByFoodProfile |>
group_by(Nutrient, GroupCodeShort) |>
reframe(
daily_mean_intake = mean(mean_daily_intake, na.rm = TRUE),
sd_daily_intake = sd(mean_daily_intake, na.rm = TRUE)
) |>
group_by(Nutrient) |>
arrange(desc(daily_mean_intake), .by_group=TRUE) |>
top_n(5, daily_mean_intake)
knitr::kable(meanNutrientByGroupCodeShort, caption = "Contributors to each nutrient", digits = 2)
Contributors to each nutrient
| CHO |
Others |
150.23 |
89.28 |
| CHO |
Breads, Rolls and Scones |
58.58 |
29.63 |
| CHO |
Fruit drinks and others |
52.97 |
39.07 |
| CHO |
Beverages Sugar-sweetened |
44.95 |
53.08 |
| CHO |
Breakfast cereals |
44.73 |
23.06 |
| ENERGY (KCAL) |
Others |
1603.18 |
750.13 |
| ENERGY (KCAL) |
Dietary Supplements |
327.38 |
379.83 |
| ENERGY (KCAL) |
Breads, Rolls and Scones |
302.24 |
148.29 |
| ENERGY (KCAL) |
Biscuits, Cakes and Savouries |
294.44 |
243.10 |
| ENERGY (KCAL) |
Breakfast cereals |
290.48 |
143.30 |
| FAT |
Others |
70.11 |
35.93 |
| FAT |
Sugars, Confectionary and Chocolate Confectionary |
14.73 |
5.98 |
| FAT |
Biscuits, Cakes and Savouries |
14.62 |
13.45 |
| FAT |
Creams, Ice-creams and Desserts |
10.16 |
10.02 |
| FAT |
Sugars, Confectionary and Crisps and savoury
Snacks |
9.25 |
3.69 |
| PROTEIN |
Others |
98.61 |
45.04 |
| PROTEIN |
Dietary Supplements |
34.58 |
33.49 |
| PROTEIN |
Breads, Rolls and Scones |
10.34 |
4.94 |
| PROTEIN |
Breakfast cereals |
9.80 |
5.44 |
| PROTEIN |
Yoghurt & yoghurt drinks |
9.18 |
6.68 |
| STARCH |
Others |
98.44 |
72.89 |
| STARCH |
Breads, Rolls and Scones |
52.84 |
26.74 |
| STARCH |
Breakfast cereals |
34.52 |
21.36 |
| STARCH |
Sugars, Confectionary and Crisps and savoury
Snacks |
20.73 |
8.54 |
| STARCH |
Biscuits, Cakes and Savouries |
17.93 |
13.92 |
| SUGARS |
Fruit drinks and others |
52.13 |
38.48 |
| SUGARS |
Others |
43.17 |
33.97 |
| SUGARS |
Beverages Sugar-sweetened |
40.21 |
48.44 |
| SUGARS |
Sugars, Confectionary and Chocolate Confectionary |
26.85 |
11.05 |
| SUGARS |
Creams, Ice-creams and Desserts |
20.79 |
20.51 |
Analysis by sport
combinedProfiles <-
koboData |> select(
participant_id,
sport_remap,
gender,
frequency_nondiet_drinks,
count_icdas_score_2_3,
count_icdas_score_2_4,
count_icdas_score_2_5,
count_icdas_score_2_6,
) |>
mutate(
combined_icdas_score_2 = count_icdas_score_2_3 +
count_icdas_score_2_4 +
count_icdas_score_2_5 +
count_icdas_score_2_6
) |>
inner_join(foodbookDailyIntakes,
by = join_by(participant_id == User),
keep = TRUE) |>
filter(!is.na(User)) |>
select(-User) |>
filter(!sport_remap %in% c('Gymnast', 'Triathelete'))
ggplot(koboData) +
geom_bar(aes(x=sport_remap))

p <- combinedProfiles |> select(
participant_id,
sport_remap)
print(p)
## # A tibble: 2,783 × 2
## participant_id sport_remap
## <chr> <chr>
## 1 aa944ab0 Cyclist
## 2 aa944ab0 Cyclist
## 3 aa944ab0 Cyclist
## 4 aa944ab0 Cyclist
## 5 aa944ab0 Cyclist
## 6 aa944ab0 Cyclist
## 7 aa944ab0 Cyclist
## 8 aa944ab0 Cyclist
## 9 aa944ab0 Cyclist
## 10 aa944ab0 Cyclist
## # ℹ 2,773 more rows
For each nutrients we plot the distribution by a specific facet /
dimension
nutrients <- c("ENERGY (KCAL)",
"CHO",
"SUGARS",
"STARCH")
nutrientsBySportMealGender <- dentalNutStats(combinedProfiles, c("sport_remap", "MealShort", "gender")) |>
pivot_longer(cols = c("mean_intake", "max_intake", "mean_daily_intake"),
names_to = "statistic")
## Rows: 14 Columns: 3
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (1): GroupCodeShort
## dbl (1): GroupCodeShortId
## lgl (1): IsCariogenic
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
for (nut in nutrients){
p <- ggplot(nutrientsBySportMealGender |> filter(Nutrient == nut )) +
geom_boxplot(aes(y=value, x=statistic, fill=sport_remap)) +
facet_wrap(MealShort+gender~.) +
ggtitle(paste(nut, " mean_intake"))
print(p)
}




nutrientsBySportGender <- dentalNutStats(combinedProfiles, c("sport_remap", "gender")) |>
pivot_longer(cols = c("mean_intake", "max_intake", "mean_daily_intake"),
names_to = "statistic")
## Rows: 14 Columns: 3
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (1): GroupCodeShort
## dbl (1): GroupCodeShortId
## lgl (1): IsCariogenic
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
for (nut in nutrients){
p <- ggplot(nutrientsBySportGender |> filter(Nutrient == nut )) +
geom_boxplot(aes(y=value, x=statistic, fill=sport_remap)) +
facet_wrap(gender~.) +
ggtitle(paste(nut, " mean_intake"))
print(p)
}




nutrientsBySportGenderGroupCodeShort <- dentalNutStats(combinedProfiles, c("sport_remap", "gender", "GroupCodeShort")) |>
pivot_longer(cols = c("mean_intake", "max_intake", "mean_daily_intake"),
names_to = "statistic")
## Rows: 14 Columns: 3
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (1): GroupCodeShort
## dbl (1): GroupCodeShortId
## lgl (1): IsCariogenic
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
for (nut in nutrients){
p <- ggplot(nutrientsBySportGenderGroupCodeShort |> filter(Nutrient == nut )) +
geom_boxplot(aes(y=value, x=statistic, fill=sport_remap)) +
facet_wrap(GroupCodeShort+gender~.) +
ggtitle(paste(nut, " mean_intake"))
print(p)
}




nutrientsBySportGenderICDAS <- dentalNutStats(combinedProfiles, c("sport_remap", "gender", "combined_icdas_score_2")) |>
pivot_longer(cols = c("mean_intake", "max_intake", "mean_daily_intake"),
names_to = "statistic")
## Rows: 14 Columns: 3
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (1): GroupCodeShort
## dbl (1): GroupCodeShortId
## lgl (1): IsCariogenic
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
for (nut in nutrients){
p <- ggplot(nutrientsBySportGenderICDAS |> filter(Nutrient == nut )) +
geom_point(aes(y=value, x=combined_icdas_score_2, color=sport_remap)) +
facet_wrap(statistic+gender~.) +
ggtitle(paste(nut, " mean_intake"))
print(p)
}




nutrientsBySportGender <- dentalNutStats(combinedProfiles, c("sport_remap", "gender")) |>
pivot_longer(cols = c("mean_daily_intake"),
names_to = "statistic")
## Rows: 14 Columns: 3
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (1): GroupCodeShort
## dbl (1): GroupCodeShortId
## lgl (1): IsCariogenic
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
for (nut in nutrients){
p <- ggplot(nutrientsBySportGender |> filter(Nutrient == nut )) +
geom_boxplot(aes(y=value, x=statistic, fill=sport_remap)) +
facet_wrap(gender~.) +
ggtitle(paste(nut, "mean_daily_intake"))
print(p)
}



